## summary of the output from the 2d DP model with estimated alpha
##
##

library(coda)
load("pairData.Rda")


load("out-2dDP-nofraud-balanced-newIDs.Rda")
out <- out.2dDP



theta1s <- out[, grep("theta1", colnames(out))]
theta2s <- out[, grep("theta2", colnames(out))]
gammas <- out[, grep("gamma", colnames(out))]
alpha <- out[, grep("alpha", colnames(out))] 

M <- nrow(out) ## number of MCMC draws

## calculate respondent-statement-specific in-sample prediction error rates
pairData.aug <- pairData
pairData.aug$post.prob.correct <- NA
for (i in 1:nrow(pairData.aug)){
    theta1.vec.s1 <- theta1s[,paste("theta1",
                                    pairData.aug$statementID1[i], sep=".")]
    theta2.vec.s1 <- theta2s[,paste("theta2",
                                    pairData.aug$statementID1[i], sep=".")]
    
    theta1.vec.s2 <- theta1s[,paste("theta1",
                                  pairData.aug$statementID2[i], sep=".")]
    theta2.vec.s2 <- theta2s[,paste("theta2",
                                  pairData.aug$statementID2[i], sep=".")]

    gamma.vec <- gammas[,paste("gamma",
                               pairData.aug$respondent[i], sep=".")]
    g1.vec <- cos(gamma.vec)
    g2.vec <- sin(gamma.vec)

    if (pairData.aug$choice[i] == pairData.aug$statementID1[i]){
        pairData.aug$post.prob.correct[i] = mean(pnorm(
            g1.vec * theta1.vec.s1 + g2.vec * theta2.vec.s1 -
            g1.vec*theta1.vec.s2 - g2.vec * theta2.vec.s2
        ))
    }
    if (pairData.aug$choice[i] == pairData.aug$statementID2[i]){
        pairData.aug$post.prob.correct[i] = mean(pnorm(
            g1.vec * theta1.vec.s2 + g2.vec * theta2.vec.s2 -
            g1.vec*theta1.vec.s1 - g2.vec * theta2.vec.s1
        ))
    }

    if (i %% 1000 == 0){
        cat("i =", i, "  of", nrow(pairData.aug), "\n")
    }
}


## > mean(pairData.aug$post.prob.correct)
## [1] 0.5673278
## > median(pairData.aug$post.prob.correct)
## [1] 0.5625114
##> mean(pairData.aug$post.prob.correct < 0.5)
##[1] 0.330114
hist(pairData.aug$post.prob.correct, nclass=100, col="dodgerblue")




## aggregate to respondent
resp.id <- sort(unique(pairData.aug$respondent))
n.resp <- length(resp.id)
resp.post.prob.correct <- rep(NA, n.resp)

for (i in 1:n.resp){
    data.sub <- pairData.aug[pairData.aug$respondent == resp.id[i],]
    resp.post.prob.correct[i] <- mean(data.sub$post.prob.correct)
}

## > mean(resp.post.prob.correct)
## [1] 0.5674421
## > median(resp.post.prob.correct)
## [1] 0.5537443
## > 
## > mean(resp.post.prob.correct < 0.53)
## [1] 0.3075162
## > mean(resp.post.prob.correct < 0.5)
## [1] 0.1098817
## > 
##> mean(resp.post.prob.correct > 0.6)
##[1] 0.2479969
hist(resp.post.prob.correct, nclass=100, col=rgb(0, 0.4, 0.7))



## aggregate to statement
statement.id <- sort(unique(c(pairData.aug$statementID1,
                              pairData.aug$statementID2)))
n.statements <- length(statement.id)
statement.post.prob.correct <- rep(NA, n.statements)

for (i in 1:n.statements){
    data.sub <- pairData.aug[pairData.aug$statementID1 == statement.id[i] |
                            pairData.aug$statementID2 == statement.id[i],]
    statement.post.prob.correct[i] <- mean(data.sub$post.prob.correct)
}


## > mean(statement.post.prob.correct)
## [1] 0.5673269
## > median(statement.post.prob.correct)
## [1] 0.5618388
##>  mean(statement.post.prob.correct > 0.6)
##[1] 0.0952381
##>  sum(statement.post.prob.correct > 0.6)
##[1] 4
hist(statement.post.prob.correct, nclass=20, col="dodgerblue")





## compare rank order correlation of theta point estimates and objective truth
## to rank order correlation of theta point estimates and valence
##


statements <- read.csv("./selectedStatementsPunctuationFixed-balanced.csv", header=TRUE, stringsAsFactors=FALSE)

statements <- statements[statements$keep==1,]

valence <- statements$valence
valence[valence=="R"] <- 1
valence[valence=="N"] <- 0
valence[valence=="L"] <- -1
valence <- as.numeric(valence)

obj.truth.6pt <- rep(NA, nrow(statements))
obj.truth.6pt[statements$rating == "pants-fire"] <- 0
obj.truth.6pt[statements$rating == "FALSE"] <- 1
obj.truth.6pt[statements$rating == "barely-true"] <- 2
obj.truth.6pt[statements$rating == "half-true"] <- 3
obj.truth.6pt[statements$rating == "mostly-true"] <- 4
obj.truth.6pt[statements$rating == "TRUE"] <- 5




gamma.hat <- colMeans(gammas)
theta1.hat <- colMeans(theta1s)
theta2.hat <- colMeans(theta2s)


gamma.range = seq(from=min(gamma.hat), to=max(gamma.hat), length.out=15)

valence.cor <- NULL
truth.cor <- NULL
for (gamma in gamma.range){
    theta.hat <- cos(gamma) * theta1.hat + sin(gamma) * theta2.hat
    valence.cor <- c(valence.cor,
                          cor(valence, theta.hat, method="spearman"))
    truth.cor <- c(truth.cor,
                          cor(obj.truth.6pt, theta.hat, method="spearman"))    
}

library(RColorBrewer)
col.pal <- rev(brewer.pal(11, "RdYlBu"))
cor.2.col <- function(cor.vals, col.pal){
    breaks <- seq(from=-1, to=1, by=2/11)
    col.vals <- rep(NA, length(cor.vals))
    for (i in 1:length(cor.vals)){
        cval <- cor.vals[i]
        if (cval < breaks[2]){
            col.vals[i] <- col.pal[1]
        }
        if (cval >= breaks[2] & cval < breaks[3]){
            col.vals[i] <- col.pal[2]
        }
        if (cval >= breaks[3] & cval < breaks[4]){
            col.vals[i] <- col.pal[3]
        }
        if (cval >= breaks[4] & cval < breaks[5]){
            col.vals[i] <- col.pal[4]
        }
        if (cval >= breaks[5] & cval < breaks[6]){
            col.vals[i] <- col.pal[5]
        }
        if (cval >= breaks[6] & cval < breaks[7]){
            col.vals[i] <- col.pal[6]
        }
        if (cval >= breaks[7] & cval < breaks[8]){
            col.vals[i] <- col.pal[7]
        }
        if (cval >= breaks[8] & cval < breaks[9]){
            col.vals[i] <- col.pal[8]
        }
        if (cval >= breaks[9] & cval < breaks[10]){
            col.vals[i] <- col.pal[9]
        }
        if (cval >= breaks[10] & cval < breaks[11]){
            col.vals[i] <- col.pal[10]
        }
        if (cval >= breaks[11]){
            col.vals[i] <- col.pal[11]
        }
    } ## end for loop
    return(col.vals)
}

pdf("gamma-cor-balanced-newIDs.pdf", height=6.5, width=10)

hist(gamma.hat, nclass=50, col=rgb(0.6, 0.933, 1, 1), prob=TRUE, ylim=c(0,3.5), main="", xlab=expression(paste("E[", gamma[i], "|Y]", sep="")),
     xlim=c(-.2, pi/2), yaxt="n", ylab="")

text(x=gamma.range, y=3.2, labels=round(truth.cor, 2), cex=0.7)
text(x=gamma.range, y=3.6, labels=round(valence.cor, 2), cex=0.7)
points(x=gamma.range, y=rep(3.3, 15), pch=15,
       col=cor.2.col(truth.cor, col.pal))
points(x=gamma.range, y=rep(3.5, 15), pch=15,
       col=cor.2.col(valence.cor, col.pal),)
text(x=-0.1, y=3.25, labels="Correlation w/ Objective Truth", cex=0.7)
text(x=-0.1, y=3.55, labels="Correlation w/ Valence", cex=0.7)

dev.off()


R.statements <- statements$textidfinal[statements$valence=="R"]
L.statements <- statements$textidfinal[statements$valence=="L"]


col.red <- brewer.pal(9, "Reds")[6]
col.red.light <- brewer.pal(9, "Reds")[4]
col.blue <- brewer.pal(9, "Blues")[7]
col.blue.light <- brewer.pal(9, "Blues")[4]
col.purp <- brewer.pal(9, "Purples")[4]
col.purp.dark <- brewer.pal(9, "Purples")[7]

pdf("theta-gamma-plot-valence-truth-newIDs.pdf", height=7*.6, width=20*.6)
par(mfrow=c(1,3))

## plot 1: truth
plot(theta1.hat, theta2.hat, xlab="Theta Dimension 1", ylab="Theta Dimension 2",
     type="n", xlim=c(-1.65, 2.2), ylim=c(-1.65, 2.2), main="(a)")


true.statements <- statements$textidfinal[statements$groupedRating == "high-truth"]
true.statements.1 <- paste("theta1", true.statements, sep=".")
true.statements.2 <- paste("theta2", true.statements, sep=".")

false.statements <- statements$textidfinal[statements$groupedRating == "low-truth"]
false.statements.1 <- paste("theta1", false.statements, sep=".")
false.statements.2 <- paste("theta2", false.statements, sep=".")

true.indic.1 <- names(theta1.hat) %in% true.statements.1 
true.indic.2 <- names(theta2.hat) %in% true.statements.2 
false.indic.1 <- names(theta1.hat) %in% false.statements.1 
false.indic.2 <- names(theta2.hat) %in% false.statements.2 

points(theta1.hat[true.indic.1], theta2.hat[true.indic.2],
       pch=15)
points(theta1.hat[false.indic.1], theta2.hat[false.indic.2],
       pch=0, col="gray")

arrows(x0=0, y0=0, x1=cos(min(gamma.hat)), y1=sin(min(gamma.hat)),
       length=0.05 )
text(x=cos(min(gamma.hat)) + 0.25, y=sin(min(gamma.hat)),
     labels="g(0.18)", cex=0.7)

arrows(x0=0, y0=0, x1=cos(max(gamma.hat)), y1=sin(max(gamma.hat)),
       length=0.05 )
text(x=cos(max(gamma.hat)) + 0.25, y=sin(max(gamma.hat)),
     labels="g(1.41)", cex=0.7)

arrows(x0=0, y0=0, x1=cos(0.77), y1=sin(0.77),
       length=0.05 )
text(x=cos(0.77) + 0.25, y=sin(0.77),
     labels="g(0.77)", cex=0.7)

legend(x=0.9, y=2.25,
       legend=c("High Truth Value", "Low Truth Value"),
       pch=c(15, 0), cex=0.9, col=c("black", "gray"))



## plot 2: valence
plot(theta1.hat, theta2.hat, xlab="Theta Dimension 1", ylab="Theta Dimension 2",
     type="n", xlim=c(-1.65, 2.2), ylim=c(-1.65, 2.2), main="(b)")

R.statements.1 <- paste("theta1", R.statements, sep=".")
R.statements.2 <- paste("theta2", R.statements, sep=".")
L.statements.1 <- paste("theta1", L.statements, sep=".")
L.statements.2 <- paste("theta2", L.statements, sep=".")

R.indic.1 <- names(theta1.hat) %in% R.statements.1 
R.indic.2 <- names(theta2.hat) %in% R.statements.2 
L.indic.1 <- names(theta1.hat) %in% L.statements.1 
L.indic.2 <- names(theta2.hat) %in% L.statements.2 
other.indic.1 <- !(R.indic.1 | L.indic.1)
other.indic.2 <- !(R.indic.2 | L.indic.2)

points(theta1.hat[R.indic.1], theta2.hat[R.indic.2],
       pch=16, col=col.red)
points(theta1.hat[L.indic.1], theta2.hat[L.indic.2],
       pch=17, col=col.blue)
points(theta1.hat[other.indic.1], theta2.hat[other.indic.2],
       pch=3, col=col.purp)

arrows(x0=0, y0=0, x1=cos(min(gamma.hat)), y1=sin(min(gamma.hat)),
       length=0.05 )
text(x=cos(min(gamma.hat)) + 0.25, y=sin(min(gamma.hat)),
     labels="g(0.18)", cex=0.7)

arrows(x0=0, y0=0, x1=cos(max(gamma.hat)), y1=sin(max(gamma.hat)),
       length=0.05 )
text(x=cos(max(gamma.hat)) + 0.25, y=sin(max(gamma.hat)),
     labels="g(1.41)", cex=0.7)

arrows(x0=0, y0=0, x1=cos(0.77), y1=sin(0.77),
       length=0.05 )
text(x=cos(0.77) + 0.25, y=sin(0.77),
     labels="g(0.77)", cex=0.7)

legend(x=0.9, y=2.25,
       legend=c("Left Valence", "Other", "Right Valence"),
       pch=c(17, 3, 16), col=c(col.blue, col.purp, col.red), cex=0.9)




## plot 3: partisanship + truth
R.statements.true <- statements$textidfinal[statements$valence=="R" & statements$groupedRating == "high-truth"]
R.statements.false <- statements$textidfinal[statements$valence=="R" & statements$groupedRating == "low-truth"]
L.statements.true <- statements$textidfinal[statements$valence=="L" & statements$groupedRating == "high-truth"]
L.statements.false <- statements$textidfinal[statements$valence=="L" & statements$groupedRating == "low-truth"]


plot(theta1.hat, theta2.hat, xlab="Theta Dimension 1", ylab="Theta Dimension 2",
     type="n", xlim=c(-1.65, 2.2), ylim=c(-1.65, 2.2), main="(c)")

R.statements.true.1 <- paste("theta1", R.statements.true, sep=".")
R.statements.true.2 <- paste("theta2", R.statements.true, sep=".")
R.statements.false.1 <- paste("theta1", R.statements.false,
                                     sep=".")
R.statements.false.2 <- paste("theta2", R.statements.false,
                                     sep=".")

R.true.indic.1 <- names(theta1.hat) %in% R.statements.true.1 
R.true.indic.2 <- names(theta2.hat) %in% R.statements.true.2 
R.false.indic.1 <- names(theta1.hat) %in% R.statements.false.1 
R.false.indic.2 <- names(theta2.hat) %in% R.statements.false.2 


L.statements.true.1 <- paste("theta1", L.statements.true, sep=".")
L.statements.true.2 <- paste("theta2", L.statements.true, sep=".")
L.statements.false.1 <- paste("theta1", L.statements.false,
                                      sep=".")
L.statements.false.2 <- paste("theta2", L.statements.false,
                                      sep=".")

L.true.indic.1 <- names(theta1.hat) %in% L.statements.true.1 
L.true.indic.2 <- names(theta2.hat) %in% L.statements.true.2 
L.false.indic.1 <- names(theta1.hat) %in% L.statements.false.1 
L.false.indic.2 <- names(theta2.hat) %in% L.statements.false.2 

other.true.indic.1 <- other.indic.1 & true.indic.1
other.true.indic.2 <- other.indic.2 & true.indic.2
other.false.indic.1 <- other.indic.1 & false.indic.1
other.false.indic.2 <- other.indic.2 & false.indic.2

points(theta1.hat[R.true.indic.1],
       theta2.hat[R.true.indic.2],
       pch=16, col=col.red)
points(theta1.hat[R.false.indic.1],
       theta2.hat[R.false.indic.2],
       pch=1, col=col.red.light)

points(theta1.hat[L.true.indic.1],
       theta2.hat[L.true.indic.2],
       pch=17, col=col.blue)
points(theta1.hat[L.false.indic.1],
       theta2.hat[L.false.indic.2],
       pch=2, col=col.blue.light)

points(theta1.hat[other.true.indic.1],
       theta2.hat[other.true.indic.2],
       pch=3, col=col.purp.dark)
points(theta1.hat[other.false.indic.1],
       theta2.hat[other.false.indic.2],
       pch=3, col=col.purp)

arrows(x0=0, y0=0, x1=cos(min(gamma.hat)), y1=sin(min(gamma.hat)),
       length=0.05 )
text(x=cos(min(gamma.hat)) + 0.25, y=sin(min(gamma.hat)),
     labels="g(0.18)", cex=0.7)

arrows(x0=0, y0=0, x1=cos(max(gamma.hat)), y1=sin(max(gamma.hat)),
       length=0.05 )
text(x=cos(max(gamma.hat)) + 0.25, y=sin(max(gamma.hat)),
     labels="g(1.41)", cex=0.7)

arrows(x0=0, y0=0, x1=cos(0.77), y1=sin(0.77),
       length=0.05 )
text(x=cos(0.77) + 0.25, y=sin(0.77),
     labels="g(0.77)", cex=0.7)



legend(x=0.5, y=2.25,
       legend=c("Left Valence & High Truth", "Left Valence & Low Truth",
                "Other & High Truth", "Other & Low Truth",
                "Right Valence & High Truth", "Right Valence & Low Truth"),
       pch=c(17, 2, 3, 3, 16, 1), col=c(col.blue, col.blue.light,
                                        col.purp.dark, col.purp,
                                        col.red, col.red.light), cex=0.9)





dev.off()






















